notebook.community

Edit and run



In [1]:

    
import os
from datetime import datetime
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt



In [2]:

    
os.chdir("..")



In [3]:

    
from Signals.BitcoinData import BitcoinData
from Signals.WikipediaData import WikipediaData
from Signals.FXData import FXData

Download data for relevant range and normalize.



In [4]:

    
bd = BitcoinData()
bitcoin = bd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))



In [5]:

    
wd = WikipediaData()
wiki = wd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))



In [6]:

    
fxd = FXData()
fx = fxd.get(datetime(2016, 1, 1), datetime(2016, 6, 1))



In [7]:

    
x = bitcoin.merge(fx, how='outer', left_index=True, right_index=True)\
           .merge(wiki, how='outer', left_index=True, right_index=True)

Fill forward missing data.



In [8]:

    
x['USDCNY'].fillna(method='pad', inplace=True)
x['USDEUR'].fillna(method='pad', inplace=True)
x['VIX'].fillna(method='pad', inplace=True)

Add transformations.



In [9]:

    
x['next_open'] = x['Open'].shift(-1)
x['change'] = x['next_open'] - x['Open']
x['next_day_higher'] = x['change'].map(lambda x: x > 0)



In [10]:

    
def addlogret(df, col):
    df[col+'_logret'] = np.log(df[col]) - np.log(df[col].shift(1))
def addewma(df, col):
    for i in [3, 10, 30]:
        df[col+'_ewma_'+str(i)] = df[col].ewm(halflife=i).mean()
def addewmvar(df, col):
    for i in [3, 10, 30]:
        df[col+'_ewmvar_'+str(i)] = df[col].ewm(halflife=i).var()



In [11]:

    
for col in ['Open', 'Volume', 'USDCNY', 'USDEUR', 'VIX', 'views']:
    addlogret(x, col)
    addewma(x, col)
    addewma(x, col+"_logret")
    addewmvar(x, col)



In [12]:

    
x.dropna(inplace=True)

Build logistic regression model.



In [13]:

    
y = x['next_day_higher']



In [14]:

    
x.drop(['next_day_higher'], axis=1, inplace=True)



In [15]:

    
from sklearn import linear_model



In [16]:

    
logreg = linear_model.LogisticRegression(C=1e5)
logreg.fit(x, y)









    Out[16]:





LogisticRegression(C=100000.0, class_weight=None, dual=False,
          fit_intercept=True, intercept_scaling=1, max_iter=100,
          multi_class='ovr', n_jobs=1, penalty='l2', random_state=None,
          solver='liblinear', tol=0.0001, verbose=0, warm_start=False)



In [17]:

    
print("Model score: {:.1f}%".format(100*logreg.score(x, y)))









    



Model score: 56.5%

Test predictions.



In [18]:

    
from PerfAnalysis.PnL import PnL



In [19]:

    
pnl = PnL()
pnl.calc_pnl(x, y, price_col="Open")









    Out[19]:





{'APR': 212.82985561414253,
 'cash': 0,
 'position': 0.51604581259045823,
 'value': 269.0353239359095}

213% (annualized) return over the training set, even including 25 bp commission, but how about the final six months of the year?



In [20]:

    
bitcoin = bd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
wiki = wd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
fx = fxd.get(datetime(2016, 6, 1), datetime(2017, 1, 1))
x = bitcoin.merge(fx, how='outer', left_index=True, right_index=True)\
           .merge(wiki, how='outer', left_index=True, right_index=True)
x['USDCNY'].fillna(method='pad', inplace=True)
x['USDEUR'].fillna(method='pad', inplace=True)
x['VIX'].fillna(method='pad', inplace=True)
x['next_open'] = x['Open'].shift(-1)
x['change'] = x['next_open'] - x['Open']
x['next_day_higher'] = x['change'].map(lambda x: x > 0)
for col in ['Open', 'Volume', 'USDCNY', 'USDEUR', 'VIX', 'views']:
    addlogret(x, col)
    addewma(x, col)
    addewma(x, col+"_logret")
    addewmvar(x, col)
x.drop(['next_day_higher'], axis=1, inplace=True)
x.dropna(inplace=True)



In [21]:

    
pred = logreg.predict(x)



In [22]:

    
pnl.calc_pnl(x, pred, price_col="Open")









    Out[22]:





{'APR': -111.22094549159256,
 'cash': 74.649234306898478,
 'position': 0,
 'value': 74.649234306898478}

A ha, much less successful on the test dataset.